The aim of this competition is to predict the sale price of each property. The target variable is called price_doc in train.csv.
The training data is from August 2011 to June 2015, and the test set is from July 2015 to May 2016. The dataset also includes information about overall conditions in Russia's economy and finance sector, so you can focus on generating accurate price forecasts for individual properties, without needing to second-guess what the business cycle will do.
Data Files
train.csv, test.csv: information about individual transactions. The rows are indexed by the "id" field, which refers to individual transactions (particular properties might appear more than once, in separate transactions). These files also include supplementary information about the local area of each property. macro.csv: data on Russia's macroeconomy and financial sector (could be joined to the train and test sets on the "timestamp" column) sample_submission.csv: an example submission file in the correct format data_dictionary.txt: explanations of the fields available in the other data files
In [81]:
# Load libraries
import numpy
from numpy import arange
from matplotlib import pyplot
from pandas import read_csv
from pandas import set_option
from pandas.tools.plotting import scatter_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals()) #this line makes it easy to work with locals and globals
In [40]:
filename = 'C:/Users/usjry/Documents/GitHub/ml_practice_notebooks/housing market/train.csv'
train = read_csv(filename, header = 0)
filename = 'C:/Users/usjry/Documents/GitHub/ml_practice_notebooks/housing market/test.csv'
test = read_csv(filename, header = 0)
filename = 'C:/Users/usjry/Documents/GitHub/ml_practice_notebooks/housing market/macro.csv'
macro = read_csv(filename, header = 0)
In [45]:
print(train.head(10))
id timestamp full_sq life_sq floor max_floor material build_year \
0 1 2011-08-20 43 27.0 4.0 NaN NaN NaN
1 2 2011-08-23 34 19.0 3.0 NaN NaN NaN
2 3 2011-08-27 43 29.0 2.0 NaN NaN NaN
3 4 2011-09-01 89 50.0 9.0 NaN NaN NaN
4 5 2011-09-05 77 77.0 4.0 NaN NaN NaN
5 6 2011-09-06 67 46.0 14.0 NaN NaN NaN
6 7 2011-09-08 25 14.0 10.0 NaN NaN NaN
7 8 2011-09-09 44 44.0 5.0 NaN NaN NaN
8 9 2011-09-10 42 27.0 5.0 NaN NaN NaN
9 10 2011-09-13 36 21.0 9.0 NaN NaN NaN
num_room kitch_sq ... cafe_count_5000_price_2500 \
0 NaN NaN ... 9
1 NaN NaN ... 15
2 NaN NaN ... 10
3 NaN NaN ... 11
4 NaN NaN ... 319
5 NaN NaN ... 62
6 NaN NaN ... 81
7 NaN NaN ... 9
8 NaN NaN ... 19
9 NaN NaN ... 19
cafe_count_5000_price_4000 cafe_count_5000_price_high \
0 4 0
1 3 0
2 3 0
3 2 1
4 108 17
5 14 1
6 16 3
7 4 0
8 8 1
9 13 0
big_church_count_5000 church_count_5000 mosque_count_5000 \
0 13 22 1
1 15 29 1
2 11 27 0
3 4 4 0
4 135 236 2
5 53 78 1
6 38 80 1
7 11 18 1
8 18 34 1
9 10 20 1
leisure_count_5000 sport_count_5000 market_count_5000 price_doc
0 0 52 4 5850000
1 10 66 14 6000000
2 4 67 10 5700000
3 0 26 3 13100000
4 91 195 14 16331452
5 20 113 17 9100000
6 27 127 8 5500000
7 0 47 4 2000000
8 3 85 11 5300000
9 3 67 1 2000000
[10 rows x 292 columns]
In [49]:
dataset = train
In [50]:
# Summarize Data
# Descriptive statistics
# shape
print(dataset.shape)
(30471, 292)
In [51]:
# types
print(dataset.dtypes)
id int64
timestamp object
full_sq int64
life_sq float64
floor float64
max_floor float64
material float64
build_year float64
num_room float64
kitch_sq float64
state float64
product_type object
sub_area object
area_m float64
raion_popul int64
green_zone_part float64
indust_part float64
children_preschool int64
preschool_quota float64
preschool_education_centers_raion int64
children_school int64
school_quota float64
school_education_centers_raion int64
school_education_centers_top_20_raion int64
hospital_beds_raion float64
healthcare_centers_raion int64
university_top_20_raion int64
sport_objects_raion int64
additional_education_raion int64
culture_objects_top_25 object
...
big_church_count_3000 int64
church_count_3000 int64
mosque_count_3000 int64
leisure_count_3000 int64
sport_count_3000 int64
market_count_3000 int64
green_part_5000 float64
prom_part_5000 float64
office_count_5000 int64
office_sqm_5000 int64
trc_count_5000 int64
trc_sqm_5000 int64
cafe_count_5000 int64
cafe_sum_5000_min_price_avg float64
cafe_sum_5000_max_price_avg float64
cafe_avg_price_5000 float64
cafe_count_5000_na_price int64
cafe_count_5000_price_500 int64
cafe_count_5000_price_1000 int64
cafe_count_5000_price_1500 int64
cafe_count_5000_price_2500 int64
cafe_count_5000_price_4000 int64
cafe_count_5000_price_high int64
big_church_count_5000 int64
church_count_5000 int64
mosque_count_5000 int64
leisure_count_5000 int64
sport_count_5000 int64
market_count_5000 int64
price_doc int64
dtype: object
In [52]:
# head
print(dataset.head(20))
id timestamp full_sq life_sq floor max_floor material build_year \
0 1 2011-08-20 43 27.0 4.0 NaN NaN NaN
1 2 2011-08-23 34 19.0 3.0 NaN NaN NaN
2 3 2011-08-27 43 29.0 2.0 NaN NaN NaN
3 4 2011-09-01 89 50.0 9.0 NaN NaN NaN
4 5 2011-09-05 77 77.0 4.0 NaN NaN NaN
5 6 2011-09-06 67 46.0 14.0 NaN NaN NaN
6 7 2011-09-08 25 14.0 10.0 NaN NaN NaN
7 8 2011-09-09 44 44.0 5.0 NaN NaN NaN
8 9 2011-09-10 42 27.0 5.0 NaN NaN NaN
9 10 2011-09-13 36 21.0 9.0 NaN NaN NaN
10 11 2011-09-16 36 19.0 12.0 NaN NaN NaN
11 12 2011-09-16 38 19.0 11.0 NaN NaN NaN
12 13 2011-09-17 43 28.0 4.0 NaN NaN NaN
13 14 2011-09-19 31 31.0 4.0 NaN NaN NaN
14 15 2011-09-19 31 21.0 3.0 NaN NaN NaN
15 16 2011-09-20 51 31.0 15.0 NaN NaN NaN
16 17 2011-09-20 47 31.0 4.0 NaN NaN NaN
17 18 2011-09-20 42 28.0 2.0 NaN NaN NaN
18 19 2011-09-22 59 33.0 10.0 NaN NaN NaN
19 20 2011-09-22 44 29.0 4.0 NaN NaN NaN
num_room kitch_sq ... cafe_count_5000_price_2500 \
0 NaN NaN ... 9
1 NaN NaN ... 15
2 NaN NaN ... 10
3 NaN NaN ... 11
4 NaN NaN ... 319
5 NaN NaN ... 62
6 NaN NaN ... 81
7 NaN NaN ... 9
8 NaN NaN ... 19
9 NaN NaN ... 19
10 NaN NaN ... 1
11 NaN NaN ... 8
12 NaN NaN ... 13
13 NaN NaN ... 254
14 NaN NaN ... 88
15 NaN NaN ... 6
16 NaN NaN ... 10
17 NaN NaN ... 32
18 NaN NaN ... 1
19 NaN NaN ... 9
cafe_count_5000_price_4000 cafe_count_5000_price_high \
0 4 0
1 3 0
2 3 0
3 2 1
4 108 17
5 14 1
6 16 3
7 4 0
8 8 1
9 13 0
10 1 0
11 3 0
12 9 1
13 108 22
14 19 2
15 1 0
16 2 0
17 6 0
18 1 0
19 2 0
big_church_count_5000 church_count_5000 mosque_count_5000 \
0 13 22 1
1 15 29 1
2 11 27 0
3 4 4 0
4 135 236 2
5 53 78 1
6 38 80 1
7 11 18 1
8 18 34 1
9 10 20 1
10 5 9 0
11 10 9 0
12 7 15 0
13 57 102 1
14 63 100 0
15 9 21 0
16 7 23 0
17 13 33 1
18 6 9 0
19 10 14 0
leisure_count_5000 sport_count_5000 market_count_5000 price_doc
0 0 52 4 5850000
1 10 66 14 6000000
2 4 67 10 5700000
3 0 26 3 13100000
4 91 195 14 16331452
5 20 113 17 9100000
6 27 127 8 5500000
7 0 47 4 2000000
8 3 85 11 5300000
9 3 67 1 2000000
10 2 17 6 4650000
11 0 35 4 4800000
12 2 47 0 5100000
13 72 166 7 5200000
14 28 132 14 5000000
15 1 53 9 1850000
16 4 62 13 6300000
17 10 72 12 5900000
18 2 17 6 7900000
19 2 51 5 5200000
[20 rows x 292 columns]
In [53]:
# descriptions, change precision to 2 places
set_option('precision', 1)
print(dataset.describe())
id full_sq life_sq floor max_floor material build_year \
count 30471.0 30471.0 24088.0 30304.0 20899.0 20899.0 1.7e+04
mean 15237.9 54.2 34.4 7.7 12.6 1.8 3.1e+03
std 8796.5 38.0 52.3 5.3 6.8 1.5 1.5e+05
min 1.0 0.0 0.0 0.0 0.0 1.0 0.0e+00
25% 7620.5 38.0 20.0 3.0 9.0 1.0 2.0e+03
50% 15238.0 49.0 30.0 6.5 12.0 1.0 2.0e+03
75% 22855.5 63.0 43.0 11.0 17.0 2.0 2.0e+03
max 30473.0 5326.0 7478.0 77.0 117.0 6.0 2.0e+07
num_room kitch_sq state ... cafe_count_5000_price_2500 \
count 20899.0 20899.0 16912.0 ... 30471.0
mean 1.9 6.4 2.1 ... 32.1
std 0.9 28.3 0.9 ... 73.5
min 0.0 0.0 1.0 ... 0.0
25% 1.0 1.0 1.0 ... 2.0
50% 2.0 6.0 2.0 ... 8.0
75% 2.0 9.0 3.0 ... 21.0
max 19.0 2014.0 33.0 ... 377.0
cafe_count_5000_price_4000 cafe_count_5000_price_high \
count 30471.0 30471.0
mean 10.8 1.8
std 28.4 5.4
min 0.0 0.0
25% 1.0 0.0
50% 2.0 0.0
75% 5.0 1.0
max 147.0 30.0
big_church_count_5000 church_count_5000 mosque_count_5000 \
count 30471.0 30471.0 30471.0
mean 15.0 30.3 0.4
std 29.1 47.3 0.6
min 0.0 0.0 0.0
25% 2.0 9.0 0.0
50% 7.0 16.0 0.0
75% 12.0 28.0 1.0
max 151.0 250.0 2.0
leisure_count_5000 sport_count_5000 market_count_5000 price_doc
count 30471.0 30471.0 30471.0 3.0e+04
mean 8.6 52.8 6.0 7.1e+06
std 20.6 46.3 4.9 4.8e+06
min 0.0 0.0 0.0 1.0e+05
25% 0.0 11.0 1.0 4.7e+06
50% 2.0 48.0 5.0 6.3e+06
75% 7.0 76.0 10.0 8.3e+06
max 106.0 218.0 21.0 1.1e+08
[8 rows x 276 columns]
In [54]:
# correlation
set_option('precision', 2)
print(dataset.corr(method='pearson'))
id full_sq life_sq floor \
id 1.00e+00 2.14e-02 1.45e-02 -2.25e-02
full_sq 2.14e-02 1.00e+00 1.61e-01 8.96e-02
life_sq 1.45e-02 1.61e-01 1.00e+00 4.51e-02
floor -2.25e-02 8.96e-02 4.51e-02 1.00e+00
max_floor 7.04e-03 1.19e-01 4.24e-02 4.54e-01
material 8.51e-03 3.29e-02 1.14e-02 -9.04e-03
build_year -1.27e-02 -6.04e-03 -2.40e-03 1.19e-03
num_room 1.44e-02 6.95e-01 1.91e-01 -5.66e-03
kitch_sq -1.04e-02 2.00e-02 6.33e-04 -8.46e-03
state -1.20e-01 -9.13e-02 -6.66e-02 -1.14e-01
area_m 6.36e-02 5.63e-02 6.14e-02 -1.94e-02
raion_popul -3.63e-02 -3.89e-02 -6.40e-02 -4.69e-02
green_zone_part 6.51e-02 3.08e-02 3.71e-02 1.06e-03
indust_part -2.98e-02 -5.36e-02 -3.89e-02 -1.79e-02
children_preschool -3.31e-02 -2.56e-02 -5.31e-02 -1.20e-02
preschool_quota 4.26e-03 -3.57e-03 -3.79e-02 5.54e-02
preschool_education_centers_raion -4.88e-02 -1.07e-02 -5.03e-02 1.32e-02
children_school -3.15e-02 -2.43e-02 -5.16e-02 -1.65e-02
school_quota 6.00e-03 1.63e-02 1.01e-03 8.02e-02
school_education_centers_raion -4.23e-02 -1.21e-02 -5.01e-02 -6.78e-03
school_education_centers_top_20_raion 6.98e-03 2.07e-02 5.13e-03 -9.89e-03
hospital_beds_raion -2.92e-02 -1.60e-03 2.74e-03 -1.02e-01
healthcare_centers_raion -8.65e-03 -7.84e-03 -2.84e-02 -7.67e-02
university_top_20_raion 1.06e-02 4.57e-02 2.49e-02 1.38e-03
sport_objects_raion 3.37e-04 2.81e-02 -1.79e-03 -3.58e-02
additional_education_raion -8.57e-03 7.40e-03 -3.37e-03 4.45e-02
culture_objects_top_25_raion 4.14e-02 2.62e-02 3.38e-02 1.48e-02
shopping_centers_raion 9.47e-03 1.02e-02 -4.09e-03 1.60e-02
office_raion 2.91e-02 3.35e-02 3.21e-02 -1.96e-02
full_all -1.70e-02 1.29e-02 -2.27e-02 -2.81e-02
... ... ... ... ...
big_church_count_3000 1.91e-02 2.89e-02 2.56e-02 -1.65e-02
church_count_3000 2.04e-02 3.07e-02 2.66e-02 -1.14e-02
mosque_count_3000 -2.68e-02 2.36e-02 2.29e-02 6.01e-03
leisure_count_3000 3.02e-02 3.24e-02 3.31e-02 -8.43e-03
sport_count_3000 -1.23e-02 1.06e-02 -9.11e-03 -8.68e-02
market_count_3000 -1.16e-02 -3.53e-02 -4.03e-02 -8.15e-02
green_part_5000 4.17e-02 2.46e-03 2.30e-02 -3.34e-02
prom_part_5000 2.04e-02 -5.84e-02 -5.67e-02 -1.03e-01
office_count_5000 1.73e-02 3.15e-02 2.25e-02 -5.06e-02
office_sqm_5000 2.10e-02 2.61e-02 1.59e-02 -6.43e-02
trc_count_5000 -2.50e-02 6.79e-04 -2.26e-02 -7.81e-02
trc_sqm_5000 -1.54e-02 -6.65e-03 -2.47e-02 -8.27e-02
cafe_count_5000 1.48e-02 3.45e-02 2.35e-02 -4.35e-02
cafe_sum_5000_min_price_avg 4.54e-02 4.89e-02 5.53e-02 2.90e-02
cafe_sum_5000_max_price_avg 4.58e-02 4.82e-02 5.38e-02 3.63e-02
cafe_avg_price_5000 4.57e-02 4.85e-02 5.44e-02 3.34e-02
cafe_count_5000_na_price 1.23e-02 3.31e-02 2.28e-02 -4.62e-02
cafe_count_5000_price_500 1.39e-02 3.06e-02 2.03e-02 -5.11e-02
cafe_count_5000_price_1000 1.22e-02 3.18e-02 1.95e-02 -4.77e-02
cafe_count_5000_price_1500 1.40e-02 3.68e-02 2.49e-02 -3.92e-02
cafe_count_5000_price_2500 1.86e-02 3.91e-02 2.87e-02 -3.48e-02
cafe_count_5000_price_4000 2.30e-02 4.01e-02 3.16e-02 -2.96e-02
cafe_count_5000_price_high 2.37e-02 4.36e-02 3.49e-02 -2.23e-02
big_church_count_5000 1.28e-02 2.69e-02 1.78e-02 -4.44e-02
church_count_5000 1.52e-02 2.86e-02 1.81e-02 -4.56e-02
mosque_count_5000 -4.88e-02 2.16e-02 9.95e-03 -1.23e-02
leisure_count_5000 2.07e-02 3.02e-02 2.27e-02 -4.42e-02
sport_count_5000 -1.17e-02 1.58e-03 -1.45e-02 -1.02e-01
market_count_5000 -2.56e-02 -4.13e-02 -5.03e-02 -1.24e-01
price_doc 1.21e-01 3.42e-01 1.66e-01 1.17e-01
max_floor material build_year \
id 7.04e-03 8.51e-03 -1.27e-02
full_sq 1.19e-01 3.29e-02 -6.04e-03
life_sq 4.24e-02 1.14e-02 -2.40e-03
floor 4.54e-01 -9.04e-03 1.19e-03
max_floor 1.00e+00 4.59e-02 -2.61e-04
material 4.59e-02 1.00e+00 -4.40e-03
build_year -2.61e-04 -4.40e-03 1.00e+00
num_room -1.42e-02 -2.69e-02 -8.25e-03
kitch_sq 2.03e-02 3.87e-02 5.96e-04
state -7.22e-02 -3.42e-02 2.95e-01
area_m -9.30e-02 1.18e-03 -4.57e-03
raion_popul -1.61e-02 -4.93e-02 1.46e-03
green_zone_part 1.41e-02 1.82e-02 -7.21e-03
indust_part -1.55e-02 -2.43e-02 1.13e-03
children_preschool 2.14e-02 -6.20e-02 2.45e-05
preschool_quota 8.49e-02 -1.43e-01 -5.14e-03
preschool_education_centers_raion 4.20e-02 -7.86e-02 -1.08e-03
children_school 1.43e-02 -5.97e-02 5.78e-04
school_quota 9.77e-02 -1.29e-01 -2.85e-03
school_education_centers_raion 1.20e-02 -4.78e-02 -2.46e-04
school_education_centers_top_20_raion -1.43e-02 -1.69e-02 -2.57e-03
hospital_beds_raion -1.53e-01 8.68e-02 -8.99e-03
healthcare_centers_raion -7.04e-02 2.17e-02 7.75e-03
university_top_20_raion 8.81e-03 5.06e-02 -2.50e-03
sport_objects_raion -2.15e-02 3.38e-02 -1.50e-03
additional_education_raion 6.50e-02 -6.39e-02 2.40e-03
culture_objects_top_25_raion 2.30e-02 -1.42e-02 -1.59e-03
shopping_centers_raion 2.56e-02 -5.95e-02 2.46e-03
office_raion -2.06e-02 1.68e-02 5.12e-04
full_all -3.83e-04 2.46e-02 -6.60e-04
... ... ... ...
big_church_count_3000 -1.96e-02 4.94e-03 -1.26e-03
church_count_3000 -1.56e-02 8.16e-03 7.83e-04
mosque_count_3000 -1.23e-02 -9.02e-03 1.42e-02
leisure_count_3000 -1.11e-02 1.96e-03 3.17e-05
sport_count_3000 -8.45e-02 7.28e-02 6.18e-03
market_count_3000 -8.45e-02 4.62e-02 -5.22e-03
green_part_5000 -5.05e-02 -1.99e-03 -4.83e-03
prom_part_5000 -8.20e-02 7.57e-02 -1.48e-03
office_count_5000 -5.54e-02 6.00e-02 4.83e-04
office_sqm_5000 -6.80e-02 9.13e-02 4.29e-03
trc_count_5000 -7.41e-02 6.56e-02 5.40e-03
trc_sqm_5000 -7.28e-02 6.85e-02 4.26e-03
cafe_count_5000 -4.74e-02 5.50e-02 1.98e-03
cafe_sum_5000_min_price_avg -4.00e-02 9.68e-02 3.30e-04
cafe_sum_5000_max_price_avg -2.68e-02 9.12e-02 3.01e-04
cafe_avg_price_5000 -3.20e-02 9.35e-02 3.12e-04
cafe_count_5000_na_price -4.72e-02 5.51e-02 7.62e-04
cafe_count_5000_price_500 -5.37e-02 5.58e-02 1.63e-03
cafe_count_5000_price_1000 -5.09e-02 6.00e-02 3.16e-03
cafe_count_5000_price_1500 -4.48e-02 5.35e-02 2.57e-03
cafe_count_5000_price_2500 -4.02e-02 5.09e-02 1.32e-03
cafe_count_5000_price_4000 -3.37e-02 4.37e-02 -9.06e-04
cafe_count_5000_price_high -2.95e-02 4.44e-02 2.14e-04
big_church_count_5000 -4.82e-02 3.85e-02 -1.59e-03
church_count_5000 -4.82e-02 4.47e-02 -9.90e-05
mosque_count_5000 -5.83e-02 5.04e-02 1.96e-02
leisure_count_5000 -4.87e-02 4.45e-02 -8.30e-04
sport_count_5000 -9.99e-02 9.85e-02 5.98e-03
market_count_5000 -1.13e-01 7.64e-02 7.36e-03
price_doc 9.44e-02 6.40e-02 2.16e-03
num_room kitch_sq state \
id 1.44e-02 -1.04e-02 -1.20e-01
full_sq 6.95e-01 2.00e-02 -9.13e-02
life_sq 1.91e-01 6.33e-04 -6.66e-02
floor -5.66e-03 -8.46e-03 -1.14e-01
max_floor -1.42e-02 2.03e-02 -7.22e-02
material -2.69e-02 3.87e-02 -3.42e-02
build_year -8.25e-03 5.96e-04 2.95e-01
num_room 1.00e+00 1.70e-02 8.20e-02
kitch_sq 1.70e-02 1.00e+00 4.87e-02
state 8.20e-02 4.87e-02 1.00e+00
area_m -4.08e-02 -2.34e-02 -2.95e-01
raion_popul 7.40e-02 3.76e-02 3.74e-01
green_zone_part -2.66e-02 -1.05e-02 -1.60e-01
indust_part -3.67e-02 3.73e-04 5.81e-02
children_preschool 6.35e-02 3.48e-02 3.29e-01
preschool_quota -5.71e-03 1.28e-02 9.89e-02
preschool_education_centers_raion 7.40e-02 3.22e-02 3.14e-01
children_school 6.76e-02 3.36e-02 3.22e-01
school_quota 1.45e-02 5.08e-03 5.92e-02
school_education_centers_raion 8.28e-02 3.40e-02 3.15e-01
school_education_centers_top_20_raion 3.75e-02 -1.67e-03 4.65e-02
hospital_beds_raion 4.85e-02 1.06e-02 1.19e-01
healthcare_centers_raion 6.18e-02 2.84e-02 2.26e-01
university_top_20_raion 4.40e-02 -5.22e-03 -2.26e-02
sport_objects_raion 9.23e-02 2.01e-02 1.76e-01
additional_education_raion 1.92e-02 -3.06e-03 4.33e-02
culture_objects_top_25_raion 5.96e-03 -2.03e-02 -1.25e-01
shopping_centers_raion 6.30e-02 5.56e-03 1.20e-01
office_raion 4.24e-02 -1.37e-02 -5.52e-02
full_all 6.10e-03 1.17e-02 1.02e-01
... ... ... ...
big_church_count_3000 4.32e-02 -1.04e-02 -3.43e-02
church_count_3000 4.26e-02 -1.20e-02 -4.05e-02
mosque_count_3000 3.68e-02 1.11e-03 1.30e-02
leisure_count_3000 3.32e-02 -1.50e-02 -7.80e-02
sport_count_3000 9.47e-02 1.54e-02 1.96e-01
market_count_3000 4.94e-02 1.43e-02 2.11e-01
green_part_5000 -2.71e-02 -1.74e-02 -1.01e-01
prom_part_5000 1.14e-02 1.79e-02 1.77e-01
office_count_5000 6.52e-02 -4.64e-03 8.05e-03
office_sqm_5000 7.53e-02 -5.22e-04 5.04e-02
trc_count_5000 9.58e-02 1.81e-02 2.40e-01
trc_sqm_5000 8.58e-02 1.67e-02 2.31e-01
cafe_count_5000 6.80e-02 -4.41e-03 1.88e-02
cafe_sum_5000_min_price_avg -1.09e-02 -1.39e-02 -2.22e-01
cafe_sum_5000_max_price_avg -1.28e-02 -1.51e-02 -2.25e-01
cafe_avg_price_5000 -1.21e-02 -1.47e-02 -2.24e-01
cafe_count_5000_na_price 6.77e-02 -4.24e-03 2.63e-02
cafe_count_5000_price_500 6.79e-02 -3.63e-03 2.75e-02
cafe_count_5000_price_1000 7.12e-02 -2.44e-03 3.56e-02
cafe_count_5000_price_1500 6.86e-02 -4.66e-03 1.63e-02
cafe_count_5000_price_2500 6.46e-02 -6.76e-03 -4.71e-03
cafe_count_5000_price_4000 5.74e-02 -8.28e-03 -2.28e-02
cafe_count_5000_price_high 5.81e-02 -9.21e-03 -3.15e-02
big_church_count_5000 6.09e-02 -4.00e-03 1.58e-02
church_count_5000 6.52e-02 -3.66e-03 2.30e-02
mosque_count_5000 6.28e-02 1.32e-02 9.19e-02
leisure_count_5000 5.87e-02 -6.26e-03 -3.88e-03
sport_count_5000 8.99e-02 1.69e-02 1.94e-01
market_count_5000 6.17e-02 2.64e-02 2.73e-01
price_doc 4.76e-01 2.87e-02 1.21e-01
... cafe_count_5000_price_2500 \
id ... 1.86e-02
full_sq ... 3.91e-02
life_sq ... 2.87e-02
floor ... -3.48e-02
max_floor ... -4.02e-02
material ... 5.09e-02
build_year ... 1.32e-03
num_room ... 6.46e-02
kitch_sq ... -6.76e-03
state ... -4.71e-03
area_m ... -1.93e-01
raion_popul ... 1.41e-02
green_zone_part ... -2.62e-01
indust_part ... -1.16e-01
children_preschool ... -1.77e-02
preschool_quota ... -2.78e-01
preschool_education_centers_raion ... 2.39e-02
children_school ... 3.52e-02
school_quota ... -1.84e-01
school_education_centers_raion ... 6.39e-02
school_education_centers_top_20_raion ... 3.33e-01
hospital_beds_raion ... 1.91e-01
healthcare_centers_raion ... 3.05e-01
university_top_20_raion ... 6.96e-01
sport_objects_raion ... 6.74e-01
additional_education_raion ... 4.11e-01
culture_objects_top_25_raion ... 7.42e-01
shopping_centers_raion ... 5.57e-01
office_raion ... 9.02e-01
full_all ... 3.20e-04
... ... ...
big_church_count_3000 ... 9.09e-01
church_count_3000 ... 9.15e-01
mosque_count_3000 ... 5.53e-01
leisure_count_3000 ... 8.93e-01
sport_count_3000 ... 8.15e-01
market_count_3000 ... 3.52e-01
green_part_5000 ... -3.81e-01
prom_part_5000 ... 6.78e-02
office_count_5000 ... 9.87e-01
office_sqm_5000 ... 9.43e-01
trc_count_5000 ... 7.59e-01
trc_sqm_5000 ... 6.59e-01
cafe_count_5000 ... 9.96e-01
cafe_sum_5000_min_price_avg ... 2.10e-01
cafe_sum_5000_max_price_avg ... 2.04e-01
cafe_avg_price_5000 ... 2.07e-01
cafe_count_5000_na_price ... 9.88e-01
cafe_count_5000_price_500 ... 9.91e-01
cafe_count_5000_price_1000 ... 9.88e-01
cafe_count_5000_price_1500 ... 9.93e-01
cafe_count_5000_price_2500 ... 1.00e+00
cafe_count_5000_price_4000 ... 9.90e-01
cafe_count_5000_price_high ... 9.73e-01
big_church_count_5000 ... 9.61e-01
church_count_5000 ... 9.70e-01
mosque_count_5000 ... 5.29e-01
leisure_count_5000 ... 9.86e-01
sport_count_5000 ... 8.24e-01
market_count_5000 ... 4.32e-01
price_doc ... 2.26e-01
cafe_count_5000_price_4000 \
id 2.30e-02
full_sq 4.01e-02
life_sq 3.16e-02
floor -2.96e-02
max_floor -3.37e-02
material 4.37e-02
build_year -9.06e-04
num_room 5.74e-02
kitch_sq -8.28e-03
state -2.28e-02
area_m -1.58e-01
raion_popul -1.83e-03
green_zone_part -2.40e-01
indust_part -1.27e-01
children_preschool -2.73e-02
preschool_quota -2.49e-01
preschool_education_centers_raion 1.67e-02
children_school 2.38e-02
school_quota -1.60e-01
school_education_centers_raion 5.04e-02
school_education_centers_top_20_raion 3.23e-01
hospital_beds_raion 1.72e-01
healthcare_centers_raion 2.74e-01
university_top_20_raion 6.74e-01
sport_objects_raion 6.61e-01
additional_education_raion 3.98e-01
culture_objects_top_25_raion 7.43e-01
shopping_centers_raion 5.39e-01
office_raion 8.97e-01
full_all -2.30e-02
... ...
big_church_count_3000 8.97e-01
church_count_3000 9.00e-01
mosque_count_3000 5.30e-01
leisure_count_3000 8.88e-01
sport_count_3000 7.73e-01
market_count_3000 3.10e-01
green_part_5000 -3.66e-01
prom_part_5000 3.28e-02
office_count_5000 9.67e-01
office_sqm_5000 9.19e-01
trc_count_5000 7.11e-01
trc_sqm_5000 6.22e-01
cafe_count_5000 9.80e-01
cafe_sum_5000_min_price_avg 2.35e-01
cafe_sum_5000_max_price_avg 2.29e-01
cafe_avg_price_5000 2.32e-01
cafe_count_5000_na_price 9.77e-01
cafe_count_5000_price_500 9.71e-01
cafe_count_5000_price_1000 9.64e-01
cafe_count_5000_price_1500 9.76e-01
cafe_count_5000_price_2500 9.90e-01
cafe_count_5000_price_4000 1.00e+00
cafe_count_5000_price_high 9.81e-01
big_church_count_5000 9.42e-01
church_count_5000 9.45e-01
mosque_count_5000 4.93e-01
leisure_count_5000 9.75e-01
sport_count_5000 7.79e-01
market_count_5000 3.80e-01
price_doc 2.10e-01
cafe_count_5000_price_high \
id 2.37e-02
full_sq 4.36e-02
life_sq 3.49e-02
floor -2.23e-02
max_floor -2.95e-02
material 4.44e-02
build_year 2.14e-04
num_room 5.81e-02
kitch_sq -9.21e-03
state -3.15e-02
area_m -1.55e-01
raion_popul -1.59e-02
green_zone_part -2.28e-01
indust_part -1.37e-01
children_preschool -3.65e-02
preschool_quota -2.45e-01
preschool_education_centers_raion 1.27e-02
children_school 1.17e-02
school_quota -1.51e-01
school_education_centers_raion 3.68e-02
school_education_centers_top_20_raion 3.24e-01
hospital_beds_raion 1.44e-01
healthcare_centers_raion 2.48e-01
university_top_20_raion 6.47e-01
sport_objects_raion 6.32e-01
additional_education_raion 3.95e-01
culture_objects_top_25_raion 7.32e-01
shopping_centers_raion 5.01e-01
office_raion 8.68e-01
full_all -1.78e-02
... ...
big_church_count_3000 8.47e-01
church_count_3000 8.61e-01
mosque_count_3000 5.07e-01
leisure_count_3000 8.64e-01
sport_count_3000 7.48e-01
market_count_3000 2.92e-01
green_part_5000 -3.47e-01
prom_part_5000 -1.08e-02
office_count_5000 9.40e-01
office_sqm_5000 9.03e-01
trc_count_5000 6.65e-01
trc_sqm_5000 5.85e-01
cafe_count_5000 9.57e-01
cafe_sum_5000_min_price_avg 2.34e-01
cafe_sum_5000_max_price_avg 2.28e-01
cafe_avg_price_5000 2.31e-01
cafe_count_5000_na_price 9.52e-01
cafe_count_5000_price_500 9.51e-01
cafe_count_5000_price_1000 9.41e-01
cafe_count_5000_price_1500 9.49e-01
cafe_count_5000_price_2500 9.73e-01
cafe_count_5000_price_4000 9.81e-01
cafe_count_5000_price_high 1.00e+00
big_church_count_5000 8.88e-01
church_count_5000 9.00e-01
mosque_count_5000 4.81e-01
leisure_count_5000 9.48e-01
sport_count_5000 7.53e-01
market_count_5000 3.53e-01
price_doc 2.14e-01
big_church_count_5000 \
id 1.28e-02
full_sq 2.69e-02
life_sq 1.78e-02
floor -4.44e-02
max_floor -4.82e-02
material 3.85e-02
build_year -1.59e-03
num_room 6.09e-02
kitch_sq -4.00e-03
state 1.58e-02
area_m -2.22e-01
raion_popul 4.23e-02
green_zone_part -2.81e-01
indust_part -7.89e-02
children_preschool 6.21e-03
preschool_quota -2.70e-01
preschool_education_centers_raion 4.05e-02
children_school 6.05e-02
school_quota -1.93e-01
school_education_centers_raion 9.18e-02
school_education_centers_top_20_raion 3.18e-01
hospital_beds_raion 2.17e-01
healthcare_centers_raion 3.43e-01
university_top_20_raion 6.89e-01
sport_objects_raion 6.68e-01
additional_education_raion 4.17e-01
culture_objects_top_25_raion 7.19e-01
shopping_centers_raion 5.95e-01
office_raion 8.95e-01
full_all 8.68e-03
... ...
big_church_count_3000 9.46e-01
church_count_3000 9.30e-01
mosque_count_3000 5.34e-01
leisure_count_3000 8.84e-01
sport_count_3000 8.18e-01
market_count_3000 3.96e-01
green_part_5000 -4.08e-01
prom_part_5000 1.48e-01
office_count_5000 9.77e-01
office_sqm_5000 9.13e-01
trc_count_5000 7.97e-01
trc_sqm_5000 6.93e-01
cafe_count_5000 9.70e-01
cafe_sum_5000_min_price_avg 1.47e-01
cafe_sum_5000_max_price_avg 1.42e-01
cafe_avg_price_5000 1.44e-01
cafe_count_5000_na_price 9.67e-01
cafe_count_5000_price_500 9.67e-01
cafe_count_5000_price_1000 9.68e-01
cafe_count_5000_price_1500 9.76e-01
cafe_count_5000_price_2500 9.61e-01
cafe_count_5000_price_4000 9.42e-01
cafe_count_5000_price_high 8.88e-01
big_church_count_5000 1.00e+00
church_count_5000 9.88e-01
mosque_count_5000 5.02e-01
leisure_count_5000 9.69e-01
sport_count_5000 8.26e-01
market_count_5000 4.83e-01
price_doc 1.99e-01
church_count_5000 mosque_count_5000 \
id 1.52e-02 -4.88e-02
full_sq 2.86e-02 2.16e-02
life_sq 1.81e-02 9.95e-03
floor -4.56e-02 -1.23e-02
max_floor -4.82e-02 -5.83e-02
material 4.47e-02 5.04e-02
build_year -9.90e-05 1.96e-02
num_room 6.52e-02 6.28e-02
kitch_sq -3.66e-03 1.32e-02
state 2.30e-02 9.19e-02
area_m -2.10e-01 -8.68e-02
raion_popul 4.78e-02 3.42e-03
green_zone_part -2.68e-01 -2.74e-01
indust_part -1.03e-01 -8.51e-02
children_preschool 3.65e-03 -3.62e-02
preschool_quota -2.68e-01 -2.16e-01
preschool_education_centers_raion 4.07e-02 -6.71e-02
children_school 5.89e-02 8.06e-03
school_quota -1.90e-01 -1.37e-01
school_education_centers_raion 9.40e-02 -3.85e-02
school_education_centers_top_20_raion 3.30e-01 2.68e-01
hospital_beds_raion 2.29e-01 1.00e-01
healthcare_centers_raion 3.49e-01 1.45e-01
university_top_20_raion 7.07e-01 5.03e-01
sport_objects_raion 6.72e-01 3.35e-01
additional_education_raion 4.30e-01 1.65e-01
culture_objects_top_25_raion 7.26e-01 4.06e-01
shopping_centers_raion 5.94e-01 3.03e-01
office_raion 8.95e-01 4.78e-01
full_all 1.03e-02 -8.58e-02
... ... ...
big_church_count_3000 9.32e-01 4.83e-01
church_count_3000 9.36e-01 5.17e-01
mosque_count_3000 5.66e-01 6.40e-01
leisure_count_3000 8.89e-01 4.77e-01
sport_count_3000 8.35e-01 4.96e-01
market_count_3000 4.21e-01 1.91e-01
green_part_5000 -3.79e-01 -2.62e-01
prom_part_5000 1.61e-01 1.18e-01
office_count_5000 9.83e-01 5.22e-01
office_sqm_5000 9.27e-01 5.32e-01
trc_count_5000 8.11e-01 5.36e-01
trc_sqm_5000 6.90e-01 5.11e-01
cafe_count_5000 9.80e-01 5.41e-01
cafe_sum_5000_min_price_avg 1.64e-01 1.63e-01
cafe_sum_5000_max_price_avg 1.58e-01 1.39e-01
cafe_avg_price_5000 1.61e-01 1.49e-01
cafe_count_5000_na_price 9.70e-01 5.23e-01
cafe_count_5000_price_500 9.79e-01 5.41e-01
cafe_count_5000_price_1000 9.80e-01 5.49e-01
cafe_count_5000_price_1500 9.83e-01 5.50e-01
cafe_count_5000_price_2500 9.70e-01 5.29e-01
cafe_count_5000_price_4000 9.45e-01 4.93e-01
cafe_count_5000_price_high 9.00e-01 4.81e-01
big_church_count_5000 9.88e-01 5.02e-01
church_count_5000 1.00e+00 5.50e-01
mosque_count_5000 5.50e-01 1.00e+00
leisure_count_5000 9.75e-01 4.99e-01
sport_count_5000 8.48e-01 5.18e-01
market_count_5000 5.14e-01 2.42e-01
price_doc 2.13e-01 1.75e-01
leisure_count_5000 sport_count_5000 \
id 2.07e-02 -1.17e-02
full_sq 3.02e-02 1.58e-03
life_sq 2.27e-02 -1.45e-02
floor -4.42e-02 -1.02e-01
max_floor -4.87e-02 -9.99e-02
material 4.45e-02 9.85e-02
build_year -8.30e-04 5.98e-03
num_room 5.87e-02 8.99e-02
kitch_sq -6.26e-03 1.69e-02
state -3.88e-03 1.94e-01
area_m -1.95e-01 -4.16e-01
raion_popul 1.87e-02 2.89e-01
green_zone_part -2.62e-01 -3.58e-01
indust_part -1.19e-01 1.08e-04
children_preschool -2.07e-02 2.05e-01
preschool_quota -2.61e-01 -3.68e-01
preschool_education_centers_raion 3.70e-02 2.09e-01
children_school 3.21e-02 2.50e-01
school_quota -1.81e-01 -2.78e-01
school_education_centers_raion 7.76e-02 2.83e-01
school_education_centers_top_20_raion 3.37e-01 3.04e-01
hospital_beds_raion 2.09e-01 3.19e-01
healthcare_centers_raion 3.14e-01 5.11e-01
university_top_20_raion 7.02e-01 5.94e-01
sport_objects_raion 6.65e-01 7.36e-01
additional_education_raion 4.16e-01 3.88e-01
culture_objects_top_25_raion 7.45e-01 5.37e-01
shopping_centers_raion 5.52e-01 5.62e-01
office_raion 9.04e-01 7.14e-01
full_all 7.71e-03 8.47e-02
... ... ...
big_church_count_3000 9.23e-01 7.28e-01
church_count_3000 9.27e-01 7.35e-01
mosque_count_3000 5.45e-01 4.73e-01
leisure_count_3000 9.05e-01 6.75e-01
sport_count_3000 8.04e-01 9.50e-01
market_count_3000 3.82e-01 5.97e-01
green_part_5000 -3.75e-01 -4.59e-01
prom_part_5000 8.42e-02 4.11e-01
office_count_5000 9.81e-01 8.53e-01
office_sqm_5000 9.22e-01 8.91e-01
trc_count_5000 7.50e-01 9.23e-01
trc_sqm_5000 6.39e-01 8.32e-01
cafe_count_5000 9.85e-01 8.56e-01
cafe_sum_5000_min_price_avg 1.70e-01 1.79e-02
cafe_sum_5000_max_price_avg 1.65e-01 1.72e-02
cafe_avg_price_5000 1.67e-01 1.75e-02
cafe_count_5000_na_price 9.71e-01 8.59e-01
cafe_count_5000_price_500 9.81e-01 8.64e-01
cafe_count_5000_price_1000 9.79e-01 8.81e-01
cafe_count_5000_price_1500 9.84e-01 8.49e-01
cafe_count_5000_price_2500 9.86e-01 8.24e-01
cafe_count_5000_price_4000 9.75e-01 7.79e-01
cafe_count_5000_price_high 9.48e-01 7.53e-01
big_church_count_5000 9.69e-01 8.26e-01
church_count_5000 9.75e-01 8.48e-01
mosque_count_5000 4.99e-01 5.18e-01
leisure_count_5000 1.00e+00 8.09e-01
sport_count_5000 8.09e-01 1.00e+00
market_count_5000 4.63e-01 7.34e-01
price_doc 2.00e-01 2.95e-01
market_count_5000 price_doc
id -2.56e-02 1.21e-01
full_sq -4.13e-02 3.42e-01
life_sq -5.03e-02 1.66e-01
floor -1.24e-01 1.17e-01
max_floor -1.13e-01 9.44e-02
material 7.64e-02 6.40e-02
build_year 7.36e-03 2.16e-03
num_room 6.17e-02 4.76e-01
kitch_sq 2.64e-02 2.87e-02
state 2.73e-01 1.21e-01
area_m -4.50e-01 -1.67e-01
raion_popul 4.20e-01 1.52e-01
green_zone_part -3.47e-01 -9.45e-02
indust_part 9.85e-02 -8.26e-02
children_preschool 3.11e-01 1.34e-01
preschool_quota -2.07e-01 -7.51e-02
preschool_education_centers_raion 3.30e-01 1.59e-01
children_school 3.45e-01 1.39e-01
school_quota -2.18e-01 -1.40e-02
school_education_centers_raion 4.14e-01 1.94e-01
school_education_centers_top_20_raion 1.69e-01 1.02e-01
hospital_beds_raion 3.62e-01 1.47e-01
healthcare_centers_raion 4.76e-01 1.90e-01
university_top_20_raion 3.29e-01 1.66e-01
sport_objects_raion 5.10e-01 2.53e-01
additional_education_raion 2.84e-01 5.77e-02
culture_objects_top_25_raion 2.46e-01 4.43e-02
shopping_centers_raion 4.16e-01 1.51e-01
office_raion 3.64e-01 1.46e-01
full_all 6.46e-02 2.53e-02
... ... ...
big_church_count_3000 3.93e-01 1.32e-01
church_count_3000 4.12e-01 1.36e-01
mosque_count_3000 2.33e-01 9.62e-02
leisure_count_3000 3.48e-01 1.10e-01
sport_count_3000 6.82e-01 2.91e-01
market_count_3000 7.91e-01 1.52e-01
green_part_5000 -4.47e-01 -1.58e-01
prom_part_5000 6.39e-01 8.78e-02
office_count_5000 4.91e-01 2.19e-01
office_sqm_5000 5.68e-01 2.70e-01
trc_count_5000 7.69e-01 2.89e-01
trc_sqm_5000 6.50e-01 2.68e-01
cafe_count_5000 4.90e-01 2.32e-01
cafe_sum_5000_min_price_avg -2.25e-01 3.22e-02
cafe_sum_5000_max_price_avg -2.22e-01 3.33e-02
cafe_avg_price_5000 -2.23e-01 3.29e-02
cafe_count_5000_na_price 4.87e-01 2.30e-01
cafe_count_5000_price_500 5.09e-01 2.27e-01
cafe_count_5000_price_1000 5.31e-01 2.40e-01
cafe_count_5000_price_1500 4.85e-01 2.33e-01
cafe_count_5000_price_2500 4.32e-01 2.26e-01
cafe_count_5000_price_4000 3.80e-01 2.10e-01
cafe_count_5000_price_high 3.53e-01 2.14e-01
big_church_count_5000 4.83e-01 1.99e-01
church_count_5000 5.14e-01 2.13e-01
mosque_count_5000 2.42e-01 1.75e-01
leisure_count_5000 4.63e-01 2.00e-01
sport_count_5000 7.34e-01 2.95e-01
market_count_5000 1.00e+00 1.94e-01
price_doc 1.94e-01 1.00e+00
[276 rows x 276 columns]
In [56]:
dataset = pysqldf("SELECT * FROM train as t inner join macro as m on t.timestamp = m.timestamp")
In [64]:
# shape
print(dataset.shape)
(30471, 392)
In [90]:
# Split-out validation dataset
#df.ix[:, df.columns != 'b']
array = dataset.values
X = array[:,2:3] #X = array[:,dataset.columns != 'price_doc']
Y = array[:,291]
validation_size = 0.20
seed = 7
X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size=validation_size, random_state=seed)
In [76]:
print(Y)
[5850000 6000000 5700000 ..., 6970959 13500000 5600000]
In [78]:
print(array)
[[1 '2011-08-20' 43 ..., 64.12 23587.0 230310.0]
[2 '2011-08-23' 34 ..., 64.12 23587.0 230310.0]
[3 '2011-08-27' 43 ..., 64.12 23587.0 230310.0]
...,
[30471 '2015-06-30' 45 ..., nan nan 234576.9]
[30472 '2015-06-30' 64 ..., nan nan 234576.9]
[30473 '2015-06-30' 43 ..., nan nan 234576.9]]
In [106]:
# Evaluate Algorithms
# Test options and evaluation metric
num_folds = 10
seed = 7
scoring = 'mean_squared_error'
In [107]:
# Spot Check Algorithms
models = []
models.append(('LR', LinearRegression()))
models.append(('LASSO', Lasso()))
models.append(('EN', ElasticNet()))
models.append(('KNN', KNeighborsRegressor()))
models.append(('CART', DecisionTreeRegressor()))
models.append(('SVR', SVR()))
In [108]:
# evaluate each model in turn
results = []
names = []
for name, model in models:
kfold = KFold(n_splits=num_folds, random_state=seed)
cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
print(msg)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
LR: -34711135356222.394531 (45272989269429.421875)
LASSO: -34711134909434.074219 (45272987785504.093750)
EN: -34683327938797.511719 (45187146918796.304688)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
KNN: -16158756355476.378906 (1830174867078.940918)
CART: -15415605741976.025391 (1608718215632.856689)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\metrics\scorer.py:90: DeprecationWarning: Scoring method mean_squared_error was renamed to neg_mean_squared_error in version 0.18 and will be removed in 0.20.
sample_weight=sample_weight)
SVR: -24119211273959.695312 (2915251321042.989746)
In [109]:
# Compare Algorithms
fig = pyplot.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()
In [95]:
# Standardize the dataset
pipelines = []
pipelines.append(('ScaledLR', Pipeline([('Scaler', StandardScaler()),('LR', LinearRegression())])))
pipelines.append(('ScaledLASSO', Pipeline([('Scaler', StandardScaler()),('LASSO', Lasso())])))
pipelines.append(('ScaledEN', Pipeline([('Scaler', StandardScaler()),('EN', ElasticNet())])))
pipelines.append(('ScaledKNN', Pipeline([('Scaler', StandardScaler()),('KNN', KNeighborsRegressor())])))
pipelines.append(('ScaledCART', Pipeline([('Scaler', StandardScaler()),('CART', DecisionTreeRegressor())])))
pipelines.append(('ScaledSVR', Pipeline([('Scaler', StandardScaler()),('SVR', SVR())])))
results = []
names = []
for name, model in pipelines:
kfold = KFold(n_splits=num_folds, random_state=seed)
cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
print(msg)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
ScaledLR: -34711135356222.406250 (45272989269429.437500)
ScaledLASSO: -34711125816995.992188 (45272954506018.281250)
ScaledEN: -27200571220099.667969 (19544737736403.980469)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
ScaledKNN: -16152756589672.191406 (1740203435588.155518)
ScaledCART: -15371550460683.593750 (1583210166537.577881)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
ScaledSVR: -24114630241543.226562 (2915587084734.815430)
In [96]:
# Compare Algorithms
fig = pyplot.figure()
fig.suptitle('Scaled Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()
In [97]:
# KNN Algorithm tuning
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
k_values = numpy.array([1,3,5,7,9,11,13,15,17,19,21])
param_grid = dict(n_neighbors=k_values)
model = KNeighborsRegressor()
kfold = KFold(n_splits=num_folds, random_state=seed)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=kfold)
grid_result = grid.fit(rescaledX, Y_train)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
In [98]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
Best: -14596345046410.951172 using {'n_neighbors': 21}
-26497392409359.203125 (2361332515441.965820) with: {'n_neighbors': 1}
-17622692288782.410156 (1872238336256.748047) with: {'n_neighbors': 3}
-16112985526352.214844 (1842869613360.297119) with: {'n_neighbors': 5}
-15177572996765.607422 (1559426255986.728271) with: {'n_neighbors': 7}
-14943419853364.791016 (1575532612677.548096) with: {'n_neighbors': 9}
-14978953269019.136719 (1745870368172.335449) with: {'n_neighbors': 11}
-14727470323261.951172 (1627753392377.241455) with: {'n_neighbors': 13}
-14679668617321.728516 (1646138821268.150879) with: {'n_neighbors': 15}
-14630239174528.425781 (1618557504403.530273) with: {'n_neighbors': 17}
-14639421057143.099609 (1562043429176.597168) with: {'n_neighbors': 19}
-14596345046410.951172 (1532127548398.131836) with: {'n_neighbors': 21}
In [99]:
# ensembles
ensembles = []
ensembles.append(('ScaledAB', Pipeline([('Scaler', StandardScaler()),('AB', AdaBoostRegressor())])))
ensembles.append(('ScaledGBM', Pipeline([('Scaler', StandardScaler()),('GBM', GradientBoostingRegressor())])))
ensembles.append(('ScaledRF', Pipeline([('Scaler', StandardScaler()),('RF', RandomForestRegressor())])))
ensembles.append(('ScaledET', Pipeline([('Scaler', StandardScaler()),('ET', ExtraTreesRegressor())])))
results = []
names = []
for name, model in ensembles:
kfold = KFold(n_splits=num_folds, random_state=seed)
cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
print(msg)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
ScaledAB: -15825171304182.421875 (1657486073931.083008)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
ScaledGBM: -14349558101245.152344 (1422625638623.679199)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
ScaledRF: -14613902738748.160156 (1613183452045.770264)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
ScaledET: -14864331327304.714844 (1683386353177.617920)
In [100]:
# Compare Algorithms
fig = pyplot.figure()
fig.suptitle('Scaled Ensemble Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()
In [101]:
# Tune scaled GBM
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
param_grid = dict(n_estimators=numpy.array([50,100,150,200,250,300,350,400]))
model = GradientBoostingRegressor(random_state=seed)
kfold = KFold(n_splits=num_folds, random_state=seed)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=kfold)
grid_result = grid.fit(rescaledX, Y_train)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
Best: -14111937354387.152344 using {'n_estimators': 50}
-14111937354387.152344 (1544746649249.499023) with: {'n_estimators': 50}
-14361387459812.384766 (1414919657662.410400) with: {'n_estimators': 100}
-14640703301797.687500 (1443128276519.089844) with: {'n_estimators': 150}
-14789902518443.281250 (1468842412825.094971) with: {'n_estimators': 200}
-14892872571699.949219 (1476677639653.514404) with: {'n_estimators': 250}
-14981067896095.742188 (1461347251429.686523) with: {'n_estimators': 300}
-15062517628067.623047 (1483039554772.742188) with: {'n_estimators': 350}
-15108379792398.576172 (1500342843831.599854) with: {'n_estimators': 400}
In [102]:
# Make predictions on validation dataset
# prepare the model
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
model = GradientBoostingRegressor(random_state=seed, n_estimators=400)
model.fit(rescaledX, Y_train)
# transform the validation dataset
rescaledValidationX = scaler.transform(X_validation)
predictions = model.predict(rescaledValidationX)
print(mean_squared_error(Y_validation, predictions))
C:\Users\usjry\AppData\Local\Continuum\Anaconda3\lib\site-packages\sklearn\utils\validation.py:429: DataConversionWarning: Data with input dtype object was converted to float64 by StandardScaler.
warnings.warn(msg, _DataConversionWarning)
1.23132209471e+13
In [ ]:
Content source: jsphyg/ml_practice_notebooks
Similar notebooks: